# Libraries ------

library(tidyverse)
library(haven)
library(readxl)
library(fixest)
library(splines)
library(EnvStats)

# Data -------


usd_value <- read_excel("~/BurkeLab Dropbox/Carlos Gould/In Praise/Data/statistic_id1032048_value-of-one-us-dollar-in-the-united-states-1635-2020.xlsx",
                        sheet=2, skip = 4,col_names = c("year", "Value"),col_types = c("numeric", "numeric")) %>% 
  filter(year>1970) %>% 
  mutate(VSL = 820000/Value,
         VSL_low = 400000/Value,
         VSL_high = 2150000/Value)

gbd_hap <- read_csv("~/BurkeLab Dropbox/Carlos Gould/In Praise/Data/IHME-GBD_2019_DATA-21722261-1.csv")

ecuador_mortality_population <- 
  read_xlsx("~/BurkeLab Dropbox/Carlos Gould/In Praise/Data/WPP2022_GEN_F01_DEMOGRAPHIC_INDICATORS_COMPACT_REV1.xlsx",sheet = 1) %>% 
  filter(`Region, subregion, country or area *`=="Ecuador") %>% 
  dplyr::select(`Region, subregion, country or area *`, Year, 
                `Total Population, as of 1 January (thousands)`, 
                `Total Deaths (thousands)`,`Crude Death Rate (deaths per 1,000 population)`) %>% 
  filter(Year>=1979) %>% 
  rename(population = `Total Population, as of 1 January (thousands)`,
         deaths = `Total Deaths (thousands)`,
         death_rate = `Crude Death Rate (deaths per 1,000 population)`) %>%
  mutate(population = as.numeric(population),
         deaths = as.numeric(deaths),
         death_rate = as.numeric(death_rate)) %>%
  dplyr::select(Year, deaths, population, death_rate) %>%
  left_join(
    usd_value %>% 
      rename(Year = year) %>% 
      dplyr::select(Year, VSL, VSL_low, VSL_high)
  )

cf_scenario <- read_xlsx("~/BurkeLab Dropbox/Carlos Gould/In Praise/Data/ecu_cf_ctrfact.xlsx") %>% 
  rename(Year = year) %>% 
  mutate(
    cf_ctr10yr = zoo::na.approx(cf_ctr10yr),
    cf_ctr20yr = zoo::na.approx(cf_ctr20yr)
  )


countries_cf <- read_csv("~/Downloads/access-to-clean-fuels-and-technologies-for-cooking.csv") %>% 
  filter(`Entity` == "Colombia" | 
           `Entity` == "Bolivia" | 
           `Entity` == "Peru" |
           `Entity` == "Ecuador") %>% 
  rename(cf_prop = `Indicator:Proportion of population with primary reliance on clean fuels and technologies for cooking (%) - Residence Area Type:Total`)

# Data and scenario building -------

mortality1 <- 
  ecuador_mortality_population %>% 
  left_join(
    cf_scenario
  ) %>% 
  mutate(
    cf_pop_ctrfact_10yr = cf_ctr10yr*population,
    cf_pop_ctrfact_20yr = cf_ctr20yr*population
  ) %>% 
  mutate(
    polluting_pop_ctrfact_10yr = (1-cf_ctr10yr)*population,
    polluting_pop_ctrfact_20yr = (1-cf_ctr20yr)*population
  ) %>% 
  mutate(
    polluting = 1-cf
  ) %>% 
  mutate(
    cf_pop = cf*population,
    polluting_pop = polluting*population
  )

# sample data frame ------

pm_polluting = rnormTrunc(1, mean = 50, sd = 50/2.5, min = 5, max = 150)
pm_clean = rnormTrunc(1, mean = 25, sd = 10, min = 5, max = pm_polluting)

mortality_long <- 
  mortality1 %>% 
  mutate(
    pm_polluting = pm_polluting[1] - 2.4,
    pm_clean = pm_clean[1] - 2.4
  ) %>% 
  mutate(
    rho_polluting = log(1 + (pm_polluting/1.6)) / (1 + exp((15.5-pm_polluting)/36.8)),
    rho_clean = log(1 + (pm_clean/1.6)) / (1 + exp((15.5-pm_clean)/36.8))
  ) %>% 
  mutate(
    polluting_ln_hazard = (0.1430)*rho_polluting,
    polluting_ln_hazard_hi = (0.1430 + 2*0.01807)*rho_polluting,
    polluting_ln_hazard_low = (0.1430 - 2*0.01807)*rho_polluting,
    clean_ln_hazard = (0.1430)*rho_clean,
    clean_ln_hazard_hi = (0.1430 + 2*0.01807)*rho_clean,
    clean_ln_hazard_low = (0.1430 - 2*0.01807)*rho_clean
  ) %>% 
  mutate(
    polluting_deathrate = (1 - (1/exp(polluting_ln_hazard))) * death_rate,
    polluting_deathrate_hi = (1 - (1/exp(polluting_ln_hazard_hi))) * death_rate,
    polluting_deathrate_low = (1 - (1/exp(polluting_ln_hazard_low))) * death_rate,
    clean_deathrate = (1 - (1/exp(clean_ln_hazard))) * death_rate,
    clean_deathrate_hi = (1 - (1/exp(clean_ln_hazard_hi))) * death_rate,
    clean_deathrate_low = (1 - (1/exp(clean_ln_hazard_low))) * death_rate
  ) %>% 
  mutate(
    polluting_deaths = (polluting_deathrate*polluting_pop)/1000,
    polluting_deaths_hi = (polluting_deathrate_hi*polluting_pop)/1000,
    polluting_deaths_low = (polluting_deathrate_low*polluting_pop)/1000,
    clean_deaths = (clean_deathrate*cf_pop)/1000,
    clean_deaths_hi = (clean_deathrate_hi*cf_pop)/1000,
    clean_deaths_low = (clean_deathrate_low*cf_pop)/1000,
    
    polluting_deaths_ctrfact_10yr = (polluting_deathrate*polluting_pop_ctrfact_10yr)/1000,
    polluting_deaths_ctrfact_10yr_hi = (polluting_deathrate_hi*polluting_pop_ctrfact_10yr)/1000,
    polluting_deaths_ctrfact_10yr_low = (polluting_deathrate_low*polluting_pop_ctrfact_10yr)/1000,
    clean_deaths_ctrfact_10yr = (clean_deathrate*cf_pop_ctrfact_10yr)/1000,
    clean_deaths_ctrfact_10yr_hi = (clean_deathrate_hi*cf_pop_ctrfact_10yr)/1000,
    clean_deaths_ctrfact_10yr_low = (clean_deathrate_low*cf_pop_ctrfact_10yr)/1000,
    
    polluting_deaths_ctrfact_20yr = (polluting_deathrate*polluting_pop_ctrfact_20yr)/1000,
    polluting_deaths_ctrfact_20yr_hi = (polluting_deathrate_hi*polluting_pop_ctrfact_20yr)/1000,
    polluting_deaths_ctrfact_20yr_low = (polluting_deathrate_low*polluting_pop_ctrfact_20yr)/1000,
    clean_deaths_ctrfact_20yr = (clean_deathrate*cf_pop_ctrfact_20yr)/1000,
    clean_deaths_ctrfact_20yr_hi = (clean_deathrate_hi*cf_pop_ctrfact_20yr)/1000,
    clean_deaths_ctrfact_20yr_low = (clean_deathrate_low*cf_pop_ctrfact_20yr)/1000
  ) %>% 
  mutate(
    polluting_deaths_change_ctrfact_10yr = polluting_deaths - polluting_deaths_ctrfact_10yr,
    clean_deaths_change_ctrfact_10yr = clean_deaths - clean_deaths_ctrfact_10yr,
    
    polluting_deaths_change_ctrfact_20yr = polluting_deaths - polluting_deaths_ctrfact_20yr,
    clean_deaths_change_ctrfact_20yr = clean_deaths - clean_deaths_ctrfact_20yr
  ) %>% 
  mutate(
    mortality_change_10yr = polluting_deaths_change_ctrfact_10yr + clean_deaths_change_ctrfact_10yr,
    mortality_change_20yr = polluting_deaths_change_ctrfact_20yr + clean_deaths_change_ctrfact_20yr
  ) %>% 
  mutate(mortality_change_VSL_10yr = mortality_change_10yr * VSL,
         mortality_change_VSL_20yr = mortality_change_20yr * VSL,
         
         mortality_change_VSL_10yr_low = mortality_change_10yr * VSL_low,
         mortality_change_VSL_20yr_low = mortality_change_20yr * VSL_low,
         
         mortality_change_VSL_10yr_high = mortality_change_10yr * VSL_high,
         mortality_change_VSL_20yr_high = mortality_change_20yr * VSL_high) %>% 
  mutate(model = 0)



for (i in 1:1000) {
  
  pm_polluting = rnormTrunc(1, mean = 50, sd = 50/2.5, min = 5, max = 150)
  pm_clean = rnormTrunc(1, mean = 25, sd = 10, min = 5, max = pm_polluting)
  
  mortality2 <- 
    mortality1 %>% 
    mutate(
      pm_polluting = pm_polluting[1] - 2.4,
      pm_clean = pm_clean[1] - 2.4
    ) %>% 
    mutate(
      rho_polluting = log(1 + (pm_polluting/1.6)) / (1 + exp((15.5-pm_polluting)/36.8)),
      rho_clean = log(1 + (pm_clean/1.6)) / (1 + exp((15.5-pm_clean)/36.8))
    ) %>% 
    mutate(
      polluting_ln_hazard = (0.1430)*rho_polluting,
      polluting_ln_hazard_hi = (0.1430 + 2*0.01807)*rho_polluting,
      polluting_ln_hazard_low = (0.1430 - 2*0.01807)*rho_polluting,
      clean_ln_hazard = (0.1430)*rho_clean,
      clean_ln_hazard_hi = (0.1430 + 2*0.01807)*rho_clean,
      clean_ln_hazard_low = (0.1430 - 2*0.01807)*rho_clean
    ) %>% 
    mutate(
      polluting_deathrate = (1 - (1/exp(polluting_ln_hazard))) * death_rate,
      polluting_deathrate_hi = (1 - (1/exp(polluting_ln_hazard_hi))) * death_rate,
      polluting_deathrate_low = (1 - (1/exp(polluting_ln_hazard_low))) * death_rate,
      clean_deathrate = (1 - (1/exp(clean_ln_hazard))) * death_rate,
      clean_deathrate_hi = (1 - (1/exp(clean_ln_hazard_hi))) * death_rate,
      clean_deathrate_low = (1 - (1/exp(clean_ln_hazard_low))) * death_rate
    ) %>% 
    mutate(
      polluting_deaths = (polluting_deathrate*polluting_pop)/1000,
      polluting_deaths_hi = (polluting_deathrate_hi*polluting_pop)/1000,
      polluting_deaths_low = (polluting_deathrate_low*polluting_pop)/1000,
      clean_deaths = (clean_deathrate*cf_pop)/1000,
      clean_deaths_hi = (clean_deathrate_hi*cf_pop)/1000,
      clean_deaths_low = (clean_deathrate_low*cf_pop)/1000,
      
      polluting_deaths_ctrfact_10yr = (polluting_deathrate*polluting_pop_ctrfact_10yr)/1000,
      polluting_deaths_ctrfact_10yr_hi = (polluting_deathrate_hi*polluting_pop_ctrfact_10yr)/1000,
      polluting_deaths_ctrfact_10yr_low = (polluting_deathrate_low*polluting_pop_ctrfact_10yr)/1000,
      clean_deaths_ctrfact_10yr = (clean_deathrate*cf_pop_ctrfact_10yr)/1000,
      clean_deaths_ctrfact_10yr_hi = (clean_deathrate_hi*cf_pop_ctrfact_10yr)/1000,
      clean_deaths_ctrfact_10yr_low = (clean_deathrate_low*cf_pop_ctrfact_10yr)/1000,
      
      polluting_deaths_ctrfact_20yr = (polluting_deathrate*polluting_pop_ctrfact_20yr)/1000,
      polluting_deaths_ctrfact_20yr_hi = (polluting_deathrate_hi*polluting_pop_ctrfact_20yr)/1000,
      polluting_deaths_ctrfact_20yr_low = (polluting_deathrate_low*polluting_pop_ctrfact_20yr)/1000,
      clean_deaths_ctrfact_20yr = (clean_deathrate*cf_pop_ctrfact_20yr)/1000,
      clean_deaths_ctrfact_20yr_hi = (clean_deathrate_hi*cf_pop_ctrfact_20yr)/1000,
      clean_deaths_ctrfact_20yr_low = (clean_deathrate_low*cf_pop_ctrfact_20yr)/1000
    ) %>% 
  mutate(
    polluting_deaths_change_ctrfact_10yr = polluting_deaths - polluting_deaths_ctrfact_10yr,
    clean_deaths_change_ctrfact_10yr = clean_deaths - clean_deaths_ctrfact_10yr,
    
    polluting_deaths_change_ctrfact_20yr = polluting_deaths - polluting_deaths_ctrfact_20yr,
    clean_deaths_change_ctrfact_20yr = clean_deaths - clean_deaths_ctrfact_20yr
  ) %>% 
    mutate(
      mortality_change_10yr = polluting_deaths_change_ctrfact_10yr + clean_deaths_change_ctrfact_10yr,
      mortality_change_20yr = polluting_deaths_change_ctrfact_20yr + clean_deaths_change_ctrfact_20yr
    ) %>% 
    mutate(mortality_change_VSL_10yr = mortality_change_10yr * VSL,
           mortality_change_VSL_20yr = mortality_change_20yr * VSL,
           
           mortality_change_VSL_10yr_low = mortality_change_10yr * VSL_low,
           mortality_change_VSL_20yr_low = mortality_change_20yr * VSL_low,
           
           mortality_change_VSL_10yr_high = mortality_change_10yr * VSL_high,
           mortality_change_VSL_20yr_high = mortality_change_20yr * VSL_high) %>% 
    mutate(model = i)
  

  
      
  mortality_long <-
    mortality_long %>% 
    bind_rows(mortality2)
  
  print(i)
  
  
}

write_rds(mortality_long, "~/Downloads/ecuador_subsidy_boots_mortality_long_63.rds")

# summary(tableby(~
#           pm_clean + 
#           pm_polluting + 
#           mortality_change_10yr + 
#           mortality_change_VSL_10yr + 
#           mortality_change_20yr + 
#           mortality_change_VSL_20yr,
#         mortality_averted %>% filter(!is.na(pm_clean)),control=mycontrols),text = "latex")



# 1990 to present counterfactual ------

pm_polluting = rlnormTrunc(1, meanlog = log(50), sdlog = log(50/2.5), min = 2.5, max = 100)
pm_clean = rlnormTrunc(1, meanlog = log(25), sdlog = log(10), min = 2.5, max = pm_polluting)

mortality2_1990 <- 
  mortality1 %>% 
  filter(year>=1990)  %>% 
  mutate(
    cf_ctrfact_1990 = 0.68
  ) %>% 
  mutate(
    cf_pop_ctrfact_1990 = cf_ctrfact_1990*population
  ) %>% 
  mutate(
    polluting_pop_ctrfact = (1-cf_pop_ctrfact)*population,
    polluting_pop_ctrfact_1990 = (1-cf_ctrfact_1990)*population
  ) %>% 
  mutate(
    pm_polluting = 50,
    pm_clean = 25
  ) %>% 
  mutate(
    rho_polluting = log(1 + (pm_polluting/1.6)) / (1 + exp((15.5-pm_polluting)/36.8)),
    rho_clean = log(1 + (pm_clean/1.6)) / (1 + exp((15.5-pm_clean)/36.8))
  ) %>% 
  mutate(
    polluting_ln_hazard = (0.1430)*rho_polluting,
    polluting_ln_hazard_hi = (0.1430 + 2*0.01807)*rho_polluting,
    polluting_ln_hazard_low = (0.1430 - 2*0.01807)*rho_polluting,
    clean_ln_hazard = (0.1430)*rho_clean,
    clean_ln_hazard_hi = (0.1430 + 2*0.01807)*rho_clean,
    clean_ln_hazard_low = (0.1430 - 2*0.01807)*rho_clean
  ) %>% 
  mutate(
    polluting_deathrate = (1 - (1/exp(polluting_ln_hazard))) * death_rate,
    polluting_deathrate_hi = (1 - (1/exp(polluting_ln_hazard_hi))) * death_rate,
    polluting_deathrate_low = (1 - (1/exp(polluting_ln_hazard_low))) * death_rate,
    clean_deathrate = (1 - (1/exp(clean_ln_hazard))) * death_rate,
    clean_deathrate_hi = (1 - (1/exp(clean_ln_hazard_hi))) * death_rate,
    clean_deathrate_low = (1 - (1/exp(clean_ln_hazard_low))) * death_rate
  ) %>% 
  mutate(
    polluting_deaths = (polluting_deathrate*polluting_pop)/1000,
    polluting_deaths_hi = (polluting_deathrate_hi*polluting_pop)/1000,
    polluting_deaths_low = (polluting_deathrate_low*polluting_pop)/1000,
    clean_deaths = (clean_deathrate*cf_pop)/1000,
    clean_deaths_hi = (clean_deathrate_hi*cf_pop)/1000,
    clean_deaths_low = (clean_deathrate_low*cf_pop)/1000,
    
    polluting_deaths_ctrfact = (polluting_deathrate*polluting_pop_ctrfact)/1000,
    clean_deaths_ctrfact = (clean_deathrate*cf_pop_ctrfact)/1000,
    
    polluting_deaths_ctrfact_1990 = (polluting_deathrate*polluting_pop_ctrfact_1990)/1000,
    clean_deaths_ctrfact_1990 = (clean_deathrate*cf_pop_ctrfact_1990)/1000,
    
  ) %>% 
  mutate(
    polluting_deaths_change_ctrfact = polluting_deaths - polluting_deaths_ctrfact,
    clean_deaths_change_ctrfact = clean_deaths - clean_deaths_ctrfact,
    polluting_deaths_change_ctrfact_1990 = polluting_deaths - polluting_deaths_ctrfact_1990,
    clean_deaths_change_ctrfact_1990 = clean_deaths - clean_deaths_ctrfact_1990
  ) %>% 
  mutate(
    mortality_change = polluting_deaths_change_ctrfact + clean_deaths_change_ctrfact,
    mortality_change_1990 = polluting_deaths_change_ctrfact_1990 + clean_deaths_change_ctrfact_1990
  ) %>% 
  mutate(
    mortality_ctrfact = mortality_change - mortality_change_1990
  ) %>% 
  mutate(mortality_change_VSL = mortality_change * VSL)

# view(mortality2_1990)
sum(mortality2_1990$mortality_change_1990)

# Compare to GBD estimates -------

gbd_hap_1990 <- 
  gbd_hap %>% 
  filter(rei_id==87 & year==1990) %>% 
  dplyr::select(val)

gbd_hap_total <-
  gbd_hap %>% 
  filter(rei_id==87) %>% 
  summarize(total = sum(val))

gbd_hap_total - (gbd_hap_1990*30)


# SUMMARY FIGS -----

# Summary figures ------

countries_cf_1 <- countries_cf %>% 
  bind_rows(mortality2 %>% 
              rename(cf_prop = cf) %>% 
              mutate(cf_prop = cf_prop*100) %>% 
              mutate(`Entity`="Ecuador (our data)"))

country_trajectories <- ggplot() + 
  geom_line(data=countries_cf %>% 
              bind_rows(mortality2 %>%
                          rename(cf_prop = cf,
                                 Year = year) %>%
                          mutate(cf_prop = cf_prop*100) %>%
                          mutate(`Entity`="Ecuador (our data)")) %>%
              bind_rows(mortality2 %>% 
                          rename(cf_prop = cf,
                                 Year = year) %>% 
                          mutate(cf_prop = cf_ctr10yr*100) %>% 
                          mutate(`Entity`="Ecuador (10 y behind)")) %>% 
              bind_rows(mortality2 %>% 
                          rename(cf_prop = cf,
                                 Year = year) %>% 
                          mutate(cf_prop = cf_ctr20yr*100) %>% 
                          mutate(`Entity`="Ecuador (20 y behind)")), aes(x=Year, y=cf_prop/100, group=`Entity`, color=`Entity`)) +
  scale_color_aaas() + 
  scale_y_continuous(labels=scales::percent_format(),
                     breaks=c(0, .25, .5, .75, 1)) + 
  xlab("") + ylab("Primary clean cooking fuel use") + 
  coord_cartesian(ylim=c(0,1)) + 
  ggtitle("Clean cooking fuel trajectories") + 
  theme_bw() 



country_trajectories <- ggplot() + 
  geom_line(data=countries_cf, aes(x=Year, y=cf_prop/100, group=`Entity`, color=`Entity`)) +
  scale_color_aaas() + 
  scale_y_continuous(labels=scales::percent_format(),
                     breaks=c(0, .25, .5, .75, 1)) + 
  xlab("") + ylab("Primary clean cooking fuel use") + 
  coord_cartesian(ylim=c(0,1)) + 
  ggtitle("Clean cooking fuel trajectories") + 
  theme_bw() 



ecuador_trajectories <- ggplot() + 
  geom_line(data=cf_scenario, aes(x=Year, y=cf)) +
  geom_line(data=cf_scenario, aes(x=Year, y=cf_ctr10yr), color="dodgerblue4") + 
  geom_line(data=cf_scenario, aes(x=Year, y=cf_ctr20yr), color="tomato4") +
  annotate("text", x=1990, y=0.75, label="Observed") + 
  annotate("text", x=1991, y=.45, label="10 y behind", color="dodgerblue4") + 
  annotate("text", x=2006, y=0.3, label="20 y behind\n(preferred)", color="tomato4") + 
  scale_y_continuous(labels=scales::percent_format(),
                     breaks=c(0, .25, .5, .75, 1)) + 
  xlab("") + ylab("Primary clean cooking fuel use") + 
  coord_cartesian(ylim=c(0,1)) + 
  ggtitle("Counterfactual clean cooking fuel trajectories in Ecuador") + 
  theme_bw()


ecuador_pop <-  ggplot() + 
  geom_line(data=mortality2, aes(x=year, y=population/1000000)) +
  xlab("") + ylab("Population (millions)") + 
  coord_cartesian(ylim=c(0,18)) +
  # ggtitle("Clean cooking fuel trajectories") + 
  theme_bw()

ecuador_mortality_rate <-  ggplot() + 
  geom_line(data=mortality2, aes(x=year, y=death_rate)) +
  xlab("") + ylab("Mortality rate per 1,000") + 
  coord_cartesian(ylim=c(0,10)) +
  # ggtitle("Clean cooking fuel trajectories") + 
  theme_bw()


ecuador_summary_stats <- 
  plot_grid(
    ecuador_pop,
    ecuador_mortality_rate, 
    nrow=2,
    align="hv"
  )

# cowplot::ggsave2("~/Desktop/ecuador_summary_stats.pdf",
#                  ecuador_summary_stats,
#                  width = 150,
#                  height = 200,
#                  units = "mm",
#                  dpi = 300)
# 
# 
# 
# cowplot::ggsave2("~/Desktop/ecuador_trajectories.pdf",
#                  ecuador_trajectories,
#                  width = 150,
#                  height = 100,
#                  units = "mm",
#                  dpi = 300)
# 
# 
# cowplot::ggsave2("~/Desktop/country_trajectories.pdf",
#                  country_trajectories,
#                  width = 150,
#                  height = 100,
#                  units = "mm",
#                  dpi = 300)


